cd "C:\Users\ejm5\Dropbox\IDS_PAPI"
clear all
#delimit;
set mem 3g;
use  "C:\Users\ejm5\Dropbox\GSO Enterprise Census (1)\20130623_PanelData\2005to2011_panel.dta", clear;
sort pci_id year;
drop _merge;

#delimit;
merge m:1 pci_id year using  "C:\data\IDS_PAPI\5yr_timeseries_v2.dta", 
keepusing( planning legal planning_avg legal_avg f2_new g94_new f4_new f5_new f5_1_new 
f6 f6 f7_new web_04 change_web dim3_1_access dim3_2_equity dim3_3_predict dim3_4_open);

drop _merge;

merge m:1 pci_id year using  "C:\Users\ejm5\Dropbox\IDS_PAPI\data\PCI_ts_provincelevel-w_cutoffs.dta";

drop if iddn2==.;
xtset iddn2 year;

drop _merge;
sort pci_id year;

merge m:1 pci_id year using "C:\Users\ejm5\Dropbox\IDS_PAPI\data\birthofchairman2.dta", 
keepusing( chairmanname birthofchairman  chairmanname birthofchairman PCOM_age PCOM_age_sq PCOM_code leader_remains leadership_change retirement  chairmanterm);

#delimit;
drop _merge;
sort pci_id year;

#delimit;
merge m:1 pci_id year using "C:\Users\ejm5\Dropbox\IDS_PAPI\data\telephones_cutdown.dta" , keepusing( telephones_total);
drop tel_cap;
generate tel_cap=telephones_total/population;
drop if iddn2==.;
xtset iddn2 year;


/*Variable Coding, Transformation, and Cleaning*/
replace soe=1 if  ent_type>=1 & ent_type<=4;
replace soe=0 if ent_type>4;
generate fdi=1 if ent_type>11;
replace fdi=0 if ent_type<=11;
generate private=1 if ent_type>6 & ent_type<11;
replace private=0 if private !=1;
drop if private==0 & fdi==0;

drop if revenue<0;
hhi revenues, by(pci_id year);
hhi labour, by(pci_id year);
hhi investment, by(pci_id year);
pwcorr hhi_revenues hhi_labour hhi_investment, star(5);

#delimit;
generate lag_trans3=l.sub3_trans;
generate lag_hhirev=l.hhi_revenues;

by iddn2, sort: generate panel_count=_n;
by iddn2, sort: egen max_panel_count=max(panel_count);
by pci_id year, sort: egen new_entrants=sum(panel_count) if panel_count==1;
drop if max_panel_count==1;
drop if soe==1;


by iddn2, sort: egen qualifications2=median(qualifications);
replace qualifications=qualifications2 if qualifications==.;
drop qualifications2;

by iddn2, sort: egen owner_sex2=median(owner_sex);
replace owner_sex=owner_sex2 if owner_sex==.;
drop owner_sex2;

#delimit;
by iddn2, sort: egen owner_birthyear2=median(owner_birthyear);
replace owner_birthyear=owner_birthyear2 if owner_birthyear==.;
drop owner_birthyear2;


by iddn2, sort: egen ethnicity2=median(ethnicity);
replace ethnicity=ethnicity2 if ethnicity==.;
drop ethnicity2;

#delimit;
generate owner_age=year-owner_birthyear;
generate owner_age_sq=owner_age^2;

replace startyear=2003 if startyear==3003;
replace startyear=1932 if startyear==2032;
replace startyear=2000 if startyear==200;
replace startyear=2003 if startyear==203;
replace startyear=1996 if startyear==196;
replace startyear=2000+startyear if startyear<10;
replace startyear=1900+startyear if startyear<30;

generate firm_age=year-startyear;
generate firm_age_sq=firm_age^2;

xi: impute labour i.year i.ent_type startyear revenues profits assets, generate(labour2);
generate ln_labour=ln(labour+1);
generate ln_labour_1=ln_labour if panel_count==1;

#delimit;
by iddn2, sort: egen ln_labour_est=max(ln_labour_1);



#delimit;
drop _merge;
sort pci_id year;
merge pci_id year using "C:\Users\ejm5\Dropbox\IDS_PAPI\data\provincial_controls_more.dta";


#delimit;
drop _merge;
sort pci_id year;
merge pci_id year using "C:\Users\ejm5\Dropbox\IDS_PAPI\data\provincial_controls_more2.dta";
drop if iddn2==.;
xtset iddn2 year;

#delimit;
generate ln_pop=ln(population_new);
generate ln_pop_1=ln_pop if panel_count==1;
by iddn2, sort: egen ln_pop_est=max(ln_pop_1);

#delimit;
generate ln_gdp=ln( GDP_constant);
generate ln_gdp_1=ln_gdp if panel_count==1;
by iddn2, sort: egen ln_gdp_est=max(ln_gdp_1);

generate roads_1=roads if panel_count==1;
by iddn2, sort: egen roads_est=max(roads_1);

#delimit;
generate secondary_1=secondary_new if panel_count==1;
by iddn2, sort: egen secondry_est=max(secondary_1);

generate tel_cap_1=tel_cap if panel_count==1;
by iddn2, sort: egen tel_cap_est=max(tel_cap_1);

generate ln_investment_1=ln_investment if panel_count==1;
by iddn2, sort: egen ln_investment_est=max(ln_investment_1);


#delimit;
set more off;
foreach x in tsub1_entry tsub2_land tsub3_trans tsub4_time tsub5_informal tsub6_bias tsub7_proactivity tsub8_psd tsub9_labor tsub10_legal t_unweighted{;
generate `x'_1= `x' if panel_count==1;
by iddn2, sort: egen `x'_est=max(`x'_1);
drop `x'_1;
};



egen  pci_year= group(pci_id year);
egen firm_year=group(iddn2 year);
drop ln_labour_1 ln_pop_1 ln_gdp_1 roads_1 secondary_1 ln_investment_1;


#delimit;
sum ln_labour_est, detail;
generate large_firm=1 if ln_labour_est>=r(p75) & ln_labour !=.;
replace large_firm=0 if ln_labour_est<r(p75);
generate lag_planning=l.planning;

#delimit;
tabstat iddn2 if fdi==1|private==1, stat(n) col(var) by(year);


#delimit;
save  "C:\Users\ejm5\Dropbox\GSO Enterprise Census (1)\20130623_PanelData\2005to2011_panel_1052013.dta", replace;
